# ==============================================================================
# Replication script for the simulations in Leifeld & Cranmer (2018)
# Last change: 2018-01-07
# 
# Replication of: Snijders, Tom A. B., Gerhard G. van de Bunt and Christian 
# Steglich (2009): Introduction to Stochastic Actor-Based Models for Network 
# Dynamics. Social Networks 32: 44--66.
# 
# Download original paper: http://dx.doi.org/10.1016/j.socnet.2009.02.004
# Download dataset: http://www.stats.ox.ac.uk/~snijders/siena/klas12b.zip
# Description: http://www.stats.ox.ac.uk/~snijders/siena/tutorial2010_data.htm
# ==============================================================================


# ==============================================================================
# LOAD PACKAGES AND SET RANDOM SEED
# ==============================================================================

library("RSiena")
library("network")
library("sna")
library("ergm")
library("xergm")
library("texreg")
library("ggplot2")
library("gridExtra")
library("reshape2")

sessionInfo()
# R version 3.3.0 (2016-05-03)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Ubuntu 16.04.3 LTS
# 
# locale:
#  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
#  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
#  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
#  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
#  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
# [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
# 
# attached base packages:
# [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#  [1] reshape2_1.4.1       gridExtra_2.0.0      texreg_1.36.23      
#  [4] xergm_1.8.2          GERGM_0.11.2         rem_1.1.2           
#  [7] tnam_1.6.5           btergm_1.9.0         ggplot2_2.0.0       
# [10] xergm.common_1.7.7   ergm_3.8.0           sna_2.4             
# [13] statnet.common_4.0.0 network_1.13.0       RSiena_1.2-3        
# 
# loaded via a namespace (and not attached):
#  [1] Rcpp_0.12.11         mvtnorm_1.0-5        lattice_0.20-33     
#  [4] muhaz_1.2.6          gtools_3.5.0         plyr_1.8.3          
#  [7] stats4_3.3.0         coda_0.18-1          gplots_2.17.0       
# [10] minqa_1.2.4          gdata_2.17.0         vegan_2.3-1         
# [13] nloptr_1.0.4         Matrix_1.2-6         ergm.count_3.2.0    
# [16] splines_3.3.0        lme4_1.1-10          stringr_1.2.0       
# [19] igraph_1.0.1         munsell_0.4.2        statnet_2016.4      
# [22] speedglm_0.3-1       mgcv_1.8-12          tcltk_3.3.0         
# [25] tergm_3.4.0          lpSolve_5.6.13       quadprog_1.5-5      
# [28] permute_0.8-4        MASS_7.3-45          bitops_1.0-6        
# [31] grid_3.3.0           nlme_3.1-128         gtable_0.1.2        
# [34] magrittr_1.5         scales_0.4.1         RcppParallel_4.3.20 
# [37] KernSmooth_2.23-15   stringi_1.1.5        ROCR_1.0-7          
# [40] latticeExtra_0.6-28  robustbase_0.92-5    boot_1.3-17         
# [43] trust_0.1-7          deSolve_1.12         RColorBrewer_1.1-2  
# [46] tools_3.3.0          networkDynamic_0.9.0 mstate_0.2.8        
# [49] DEoptimR_1.0-4       flexsurv_0.7         parallel_3.3.0      
# [52] survival_2.39-4      colorspace_1.2-6     cluster_2.0.4       
# [55] caTools_1.17.1

set.seed(12345)

R <- 5000                # number of bootstrap replications
nsim <- 500              # number of network simulations per GOF time step
parallel <- "multicore"  # parallel computation (no/multicore/snow)
ncpus <- 3               # number of computing cores

col_tergm <- "#00BA38"   # green
col_saom <- "#F8766D"    # red
col_pos <- "#619CFF"     # blue

# fix the row and column names
data("knecht")  # load the dataset
for (i in 1:length(friendship)) {
  rownames(friendship[[i]]) <- letters
  colnames(friendship[[i]]) <- letters
}
rownames(demographics) <- letters
rownames(primary) <- letters
colnames(primary) <- letters
rownames(delinquency) <- letters
sex <- demographics$sex
names(sex) <- letters


# ==============================================================================
# RSIENA REPLICATION
# ==============================================================================

# define composition change
comp <- rep(list(c(1, 4)), 26)
comp[[21]] <- c(1, 2)  # actor 21 drops out after the second time step
changes <- sienaCompositionChange(comp)

# set up data structure and covariates
siena.nets <- sienaNet(array(c(friendship[[1]], friendship[[2]], 
    friendship[[3]], friendship[[4]]), dim = c(26, 26, 4)))
primaryCov <- coDyadCovar(primary)
# use 'varDyadCovar' for time-varying dyadic cov (like the sienaNet command)
sexCov <- coCovar(demographics$sex)  # use 'varCovar' for time-varying nodal cov
ageCov <- coCovar(demographics$age)
ethnicityCov <- coCovar(demographics$ethnicity)
religionCov <- coCovar(demographics$religion)
delinquencyCov <- varCovar(delinquency)
delinquencyBeh <- sienaDependent(delinquency, type = "behavior")

mymodel <- sienaModelCreate(useStdInits = FALSE, projname = "myproject")

# model 1
mydata.0 <- sienaDataCreate(siena.nets, primaryCov, sexCov, ageCov, 
    ethnicityCov, religionCov, delinquencyCov, changes)

myeff.0 <- getEffects(mydata.0)
myeff.0 <- includeEffects(myeff.0, transTrip)
myeff.0 <- includeEffects(myeff.0, transTies)
myeff.0 <- includeEffects(myeff.0, cycle3)
myeff.0 <- includeEffects(myeff.0, outPopSqrt)
myeff.0 <- includeEffects(myeff.0, egoX, interaction1 = "sexCov")
# sameX = nodematch; simX = reverse of absdiff
myeff.0 <- includeEffects(myeff.0, altX, interaction1 = "sexCov")
myeff.0 <- includeEffects(myeff.0, sameX, interaction1 = "sexCov")
myeff.0 <- includeEffects(myeff.0, X, interaction1 = "primaryCov")
myeff.0 <- includeEffects(myeff.0, inPopSqrt)
myeff.0 <- includeEffects(myeff.0, outActSqrt)

saom.0 <- siena07(mymodel, data = mydata.0, effects = myeff.0, batch = TRUE, 
    verbose = FALSE, silent = TRUE, returnDeps = TRUE)  # model 0 in the paper


# ==============================================================================
# RSIENA REPLICATION FOR THE FIRST THREE TIME POINTS (TRAINING DATA)
# ==============================================================================

# define composition change
comp <- rep(list(c(1, 3)), 26)
comp[[21]] <- c(1, 2)  # actor 21 drops out after the second time step
changes <- sienaCompositionChange(comp)

# set up data structure and covariates
siena.nets <- sienaNet(array(c(friendship[[1]], friendship[[2]], 
    friendship[[3]]), dim = c(26, 26, 3)))

mymodel <- sienaModelCreate(useStdInits = FALSE, projname = "myproject")

mydata.0 <- sienaDataCreate(siena.nets, primaryCov, sexCov, ageCov, 
    ethnicityCov, religionCov, delinquencyCov, changes)

myeff.0 <- getEffects(mydata.0)
myeff.0 <- includeEffects(myeff.0, transTrip)
myeff.0 <- includeEffects(myeff.0, transTies)
myeff.0 <- includeEffects(myeff.0, cycle3)
myeff.0 <- includeEffects(myeff.0, outPopSqrt)
myeff.0 <- includeEffects(myeff.0, egoX, interaction1 = "sexCov")
myeff.0 <- includeEffects(myeff.0, altX, interaction1 = "sexCov")
myeff.0 <- includeEffects(myeff.0, sameX, interaction1 = "sexCov")
myeff.0 <- includeEffects(myeff.0, X, interaction1 = "primaryCov")
myeff.0 <- includeEffects(myeff.0, inPopSqrt)
myeff.0 <- includeEffects(myeff.0, outActSqrt)

saom.0.firstthree <- siena07(mymodel, data = mydata.0, effects = myeff.0, 
    batch = TRUE, verbose = FALSE, silent = TRUE, returnDeps = TRUE)

print(saom.0.firstthree$tconv.max)
# 0.167373
print(saom.0.firstthree$tconv)
#  [1] -0.013122555  0.026689053  0.007963647 -0.026753054  0.055831989
#  [6]  0.032694709  0.012580251  0.021299530  0.009227414  0.026745425
# [11] -0.029679497  0.012144204  0.070798009 -0.008958764


# ==============================================================================
# RSIENA OUT-OF-SAMPLE PREDICTION T3-T4
# ==============================================================================

# actor 21 is absent in both T3 and T4, so remove from all objects
fs3 <- handleMissings(friendship[[3]], na = 10, method = "remove")
fs4 <- handleMissings(friendship[[4]], na = 10, method = "remove")
primaryCov <- adjust(primary, fs4)
primaryCov <- coDyadCovar(primaryCov)
sexCov <- adjust(sex, fs4)
sexCov <- coCovar(sexCov)
siena.nets <- sienaNet(array(c(fs3, fs4), dim = c(25, 25, 2)))

mydata.0 <- sienaDataCreate(siena.nets, primaryCov, sexCov)

myeff.0 <- getEffects(mydata.0)
myeff.0 <- includeEffects(myeff.0, transTrip)
myeff.0 <- includeEffects(myeff.0, transTies)
myeff.0 <- includeEffects(myeff.0, cycle3)
myeff.0 <- includeEffects(myeff.0, outPopSqrt)
myeff.0 <- includeEffects(myeff.0, egoX, interaction1 = "sexCov")
myeff.0 <- includeEffects(myeff.0, altX, interaction1 = "sexCov")
myeff.0 <- includeEffects(myeff.0, sameX, interaction1 = "sexCov")
myeff.0 <- includeEffects(myeff.0, X, interaction1 = "primaryCov")
myeff.0 <- includeEffects(myeff.0, inPopSqrt)
myeff.0 <- includeEffects(myeff.0, outActSqrt)

saom.0.oos <- gof(saom.0.firstthree, statistics = c(esp, dsp, ideg, geodesic, 
    rocpr), nsim = nsim, parallel = parallel, ncpus = ncpus, 
    outofsample = TRUE, sienaData = mydata.0, sienaEffects = myeff.0)


# ==============================================================================
# TERGM DATA PREPARATION AND ESTIMATION
# ==============================================================================

friendship <- handleMissings(friendship, na = 10, method = "remove")
friendship <- handleMissings(friendship, na = NA, method = "fillmode")

for (i in 1:length(friendship)) {
  s <- adjust(sex, friendship[[i]])
  friendship[[i]] <- network(friendship[[i]])
  friendship[[i]] <- set.vertex.attribute(friendship[[i]], "sex", s)
  idegsqrt <- sqrt(degree(friendship[[i]], cmode = "indegree"))
  friendship[[i]] <- set.vertex.attribute(friendship[[i]], "idegsqrt", idegsqrt)
  odegsqrt <- sqrt(degree(friendship[[i]], cmode = "outdegree"))
  friendship[[i]] <- set.vertex.attribute(friendship[[i]], "odegsqrt", odegsqrt)
}

tergm.0 <- mtergm(friendship ~ edges + mutual + ttriple + transitiveties + 
    ctriple + nodeicov("idegsqrt") + nodeicov("odegsqrt") + 
    nodeocov("odegsqrt") + nodeofactor("sex") + nodeifactor("sex") + 
    nodematch("sex") + edgecov(primary) + memory("stability"), 
    control = control.ergm(MCMC.samplesize = 5000, MCMC.interval = 3000))


# ==============================================================================
# TERGM FOR THE FIRST THREE TIME STEPS (TRAINING DATA)
# ==============================================================================

tergm.0.firstthree <- mtergm(friendship[1:3] ~ edges + mutual + ttriple 
    + transitiveties + ctriple + nodeicov("idegsqrt") + nodeicov("odegsqrt") 
    + nodeocov("odegsqrt") + nodeofactor("sex") + nodeifactor("sex") 
    + nodematch("sex") + edgecov(primary) + memory("stability"), 
    control = control.ergm(MCMC.samplesize = 5000, MCMC.interval = 3000))


# ==============================================================================
# TERGM OUT-OF-SAMPLE PREDICTION T3-T4
# ==============================================================================

tergm.0.oos <- gof(tergm.0.firstthree, nsim = nsim, target = friendship[[4]], 
    formula = friendship[3:4] ~ edges + mutual + ttriple + transitiveties 
    + ctriple + nodeicov("idegsqrt") + nodeicov("odegsqrt") 
    + nodeocov("odegsqrt") + nodeofactor("sex") + nodeifactor("sex") 
    + nodematch("sex") + edgecov(primary) + memory("stability"), 
    statistics = c(esp, dsp, ideg, geodesic, rocpr), parallel = parallel, 
    ncpus = ncpus)

# save objects
save(saom.0, saom.0.firstthree, saom.0.oos, tergm.0, tergm.0.firstthree, 
    tergm.0.oos, file = "knecht-models.RData")


# ==============================================================================
# TEXREG: PRODUCE TABLES OF THE SAOM AND TERGM RESULTS
# ==============================================================================

# full models with all time steps; true re-analysis
coefnames <- c("Rate parameter period 1", "Rate parameter period 2", 
    "Rate parameter period 3", "Density/edges", "Reciprocity", 
    "Transitive triplets", "Cyclic triplets", "Transitive ties", 
    "Indegree of alter (sqrt)", "Outdegree of alter (sqrt)", 
    "Outdegree of ego (sqrt)", "Same primary school", "Male (alter)", 
    "Male (ego)", "Male (match)", "Density/edges", "Reciprocity", 
    "Transitive triplets", "Transitive ties", "Cyclic triplets", 
    "Indegree of alter (sqrt)", "Outdegree of alter (sqrt)", 
    "Outdegree of ego (sqrt)", "Male (ego)", "Male (alter)", "Male (match)", 
    "Same primary school", "Dyadic stability")

screenreg(list(saom.0, tergm.0), custom.model.names = c("SAOM", "TERGM"), 
    custom.coef.names = coefnames, reorder.coef = c(4:15, 1:3, 16), 
    include.iterations = FALSE, file = "knecht-table1.txt", single.row = TRUE, 
    include.nobs = FALSE)

texreg(list(saom.0, tergm.0), custom.model.names = c("SAOM", "TERGM"), 
    custom.coef.names = coefnames, reorder.coef = c(4:15, 1:3, 16), 
    include.iterations = FALSE, file = "knecht-table1.tex", single.row = TRUE, 
    booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, 
    caption = "Re-analysis of Model 0 using SAOM and TERGM.", 
    label = "tab:knecht", include.nobs = FALSE)

# without last time step; training data
coefnames2 <- coefnames[-3]

screenreg(list(saom.0.firstthree, tergm.0.firstthree), 
    custom.model.names = c("SAOM", "TERGM"), custom.coef.names = coefnames2, 
    reorder.coef = c(3:14, 1:2, 15), include.iterations = FALSE, 
    file = "knecht-table2.txt", single.row = TRUE, include.nobs = FALSE)

texreg(list(saom.0.firstthree, tergm.0.firstthree), 
    custom.model.names = c("SAOM", "TERGM"), custom.coef.names = coefnames2, 
    reorder.coef = c(3:14, 1:2, 15), include.iterations = FALSE, 
    file = "knecht-table2.tex", single.row = TRUE, booktabs = TRUE, 
    dcolumn = TRUE, use.packages = FALSE, 
    caption = "Re-analysis based on the first three time steps.", 
    label = "tab:knecht", include.nobs = FALSE)


# ==============================================================================
# GOODNESS-OF-FIT PLOT FOR TIE PREDICTION
# ==============================================================================

t_r_x <- tergm.0.oos$`Tie prediction`$roc@x.values[[1]]
s_r_x <- saom.0.oos$`Tie prediction`$roc@x.values[[1]]
t_r_x_rg <- tergm.0.oos$`Tie prediction`$roc.rgraph@x.values[[1]]
t_p_x <- tergm.0.oos$`Tie prediction`$pr@x.values[[1]]
s_p_x <- saom.0.oos$`Tie prediction`$pr@x.values[[1]]
t_p_x_rg <- tergm.0.oos$`Tie prediction`$pr.rgraph@x.values[[1]]

t_r_y <- tergm.0.oos$`Tie prediction`$roc@y.values[[1]]
s_r_y <- saom.0.oos$`Tie prediction`$roc@y.values[[1]]
t_r_y_rg <- tergm.0.oos$`Tie prediction`$roc.rgraph@y.values[[1]]
t_p_y <- tergm.0.oos$`Tie prediction`$pr@y.values[[1]]
s_p_y <- saom.0.oos$`Tie prediction`$pr@y.values[[1]]
t_p_y_rg <- tergm.0.oos$`Tie prediction`$pr.rgraph@y.values[[1]]

t_auc_r <- tergm.0.oos$`Tie prediction`$auc.roc
s_auc_r <- saom.0.oos$`Tie prediction`$auc.roc
t_auc_r_rg <- tergm.0.oos$`Tie prediction`$auc.roc.rgraph
t_auc_p <- tergm.0.oos$`Tie prediction`$auc.pr
s_auc_p <- saom.0.oos$`Tie prediction`$auc.pr
t_auc_p_rg <- tergm.0.oos$`Tie prediction`$auc.pr.rgraph

dat <- data.frame(x = c(t_r_x, 
                        s_r_x, 
                        t_r_x_rg, 
                        t_p_x, 
                        s_p_x, 
                        t_p_x_rg), 
                  y = c(t_r_y, 
                        s_r_y, 
                        t_r_y_rg, 
                        t_p_y, 
                        s_p_y, 
                        t_p_y_rg), 
                  Model = c(rep("TERGM", length(t_r_x)), 
                            rep("SAOM", length(s_r_x)), 
                            rep("Random", length(t_r_x_rg)), 
                            rep("TERGM", length(t_p_x)), 
                            rep("SAOM", length(s_p_x)), 
                            rep("Random", length(t_p_x_rg))), 
                  Measure = c(rep("ROC", length(t_r_x) + length(s_r_x) + 
                                    length(t_r_x_rg)), 
                              rep("PR", length(t_p_x) + length(s_p_x) + 
                                    length(t_p_x_rg)))
                  )

dat_roc <- dat[dat$Measure == "ROC", ]
dat_pr <- dat[dat$Measure == "PR", ]

dat_auc <- data.frame(AUC = c(t_auc_r, 
                              s_auc_r, 
                              t_auc_r_rg, 
                              t_auc_p, 
                              s_auc_p, 
                              t_auc_p_rg), 
                      Model = rep(c("TERGM", "SAOM", "Random"), 2), 
                      Measure = c(rep("ROC", 3), rep("PR", 3))
                      )

p1 <- ggplot(dat_roc, aes(x = x, y = y, colour = Model, Measure = Measure)) + 
  geom_line(lwd = 1) + 
  theme_bw() + 
  theme(legend.position = c(0.82, 0.2)) + 
  ggtitle("Receiver-Operating Characteristics (ROC)") + 
  ylab("True positive rate") + 
  xlab("False positive rate") + 
  scale_colour_manual(values = c(col_pos, col_saom, col_tergm))

p2 <- ggplot(dat_pr, aes(x = x, y = y, colour = Model, Measure = Measure)) + 
  geom_line(lwd = 1) + 
  theme_bw() + 
  theme(legend.position = c(0.82, 0.8)) + 
  ggtitle("Precision-Recall Curve (PR)") + 
  ylab("Positive predictive value (precision)") + 
  xlab("True positive rate (recall)") + 
  scale_colour_manual(values = c(col_pos, col_saom, col_tergm))

p3 <- ggplot(dat_auc, aes(y = AUC, fill = Model, x = Measure)) + 
  geom_bar(stat = "identity", position = position_dodge(width = 0.94)) + 
  theme_bw() + 
  theme(legend.position = c(0.15, 0.8)) + 
  ggtitle("Area under the Curve") + 
  scale_fill_manual(values = c(col_pos, col_saom, col_tergm))

pdf("knecht-rocpr.pdf", width = 13, height = 4)
grid.arrange(p1, p2, p3, ncol = 3)
dev.off()


# ==============================================================================
# GOODNESS-OF-FIT PLOT FOR AUXILIARY NETWORK STATISTICS (BOXPLOTS)
# ==============================================================================

t_esp <- tergm.0.oos[[1]]$raw
t_dsp <- tergm.0.oos[[2]]$raw
t_ind <- tergm.0.oos[[3]]$raw
t_geo <- tergm.0.oos[[4]]$raw
s_esp <- saom.0.oos[[1]]$raw
s_dsp <- saom.0.oos[[2]]$raw
s_ind <- saom.0.oos[[3]]$raw
s_geo <- saom.0.oos[[4]]$raw

t_esp_obs <- tergm.0.oos[[1]]$stats[, 1]
t_dsp_obs <- tergm.0.oos[[2]]$stats[, 1]
t_ind_obs <- tergm.0.oos[[3]]$stats[, 1]
t_geo_obs <- tergm.0.oos[[4]]$stats[, 1]
s_esp_obs <- saom.0.oos[[1]]$stats[, 1]
s_dsp_obs <- saom.0.oos[[2]]$stats[, 1]
s_ind_obs <- saom.0.oos[[3]]$stats[, 1]
s_geo_obs <- saom.0.oos[[4]]$stats[, 1]

dat_box <- data.frame(Frequency = c(unlist(as.matrix(t_esp)), 
                                    unlist(as.matrix(t_dsp)), 
                                    unlist(as.matrix(t_ind)), 
                                    unlist(as.matrix(t_geo)), 
                                    unlist(as.matrix(s_esp)), 
                                    unlist(as.matrix(s_dsp)), 
                                    unlist(as.matrix(s_ind)), 
                                    unlist(as.matrix(s_geo))), 
                      Parameter = c(rep(rownames(t_esp), nsim), 
                                    rep(rownames(t_dsp), nsim), 
                                    rep(rownames(t_ind), nsim), 
                                    rep(rownames(t_geo), nsim), 
                                    rep(rownames(s_esp), nsim), 
                                    rep(rownames(s_dsp), nsim), 
                                    rep(rownames(s_ind), nsim), 
                                    rep(rownames(s_geo), nsim)), 
                      Statistic = c(rep("Edge-wise shared partners", 
                                        nrow(t_esp) * nsim), 
                                    rep("Dyad-wise shared partners", 
                                        nrow(t_dsp) * nsim), 
                                    rep("Indegree distribution", 
                                        nrow(t_ind) * nsim), 
                                    rep("Geodesic distances", 
                                        nrow(t_geo) * nsim), 
                                    rep("Edge-wise shared partners", 
                                        nrow(s_esp) * nsim), 
                                    rep("Dyad-wise shared partners", 
                                        nrow(s_dsp) * nsim), 
                                    rep("Indegree distribution", 
                                        nrow(s_ind) * nsim), 
                                    rep("Geodesic distances", 
                                        nrow(s_geo) * nsim)), 
                      Model = c(rep("TERGM", nsim * (nrow(t_esp) + 
                                                     nrow(t_dsp) + 
                                                     nrow(t_ind) + 
                                                     nrow(t_geo))), 
                                rep("SAOM", nsim * (nrow(s_esp) + 
                                                    nrow(s_dsp) + 
                                                    nrow(s_ind) + 
                                                    nrow(s_geo)))), 
                      Observed = c(rep(t_esp_obs, nsim), 
                                   rep(t_dsp_obs, nsim), 
                                   rep(t_ind_obs, nsim), 
                                   rep(t_geo_obs, nsim), 
                                   rep(s_esp_obs, nsim), 
                                   rep(s_dsp_obs, nsim), 
                                   rep(s_ind_obs, nsim), 
                                   rep(s_geo_obs, nsim))
                      )

dat_box$Parameter <- as.character(dat_box$Parameter)
dat_box$Parameter <- factor(dat_box$Parameter, levels = as.character(sort(
    as.numeric(as.character(unique(dat_box$Parameter))))))

dat_esp <- dat_box[dat_box$Statistic == "Edge-wise shared partners" & 
    !dat_box$Parameter %in% c("13", "14", "15", "16", "17", "18"), ]
dat_dsp <- dat_box[dat_box$Statistic == "Dyad-wise shared partners" & 
    !dat_box$Parameter %in% c("12", "13", "14", "15", "16", "17", "18"), ]
dat_ind <- dat_box[dat_box$Statistic == "Indegree distribution" & 
    !dat_box$Parameter %in% c("16", "17", "18", "19", "20"), ]
dat_geo <- dat_box[dat_box$Statistic == "Geodesic distances" & 
    !dat_box$Parameter %in% c("7", "8", "9", "10"), ]

box_1 <- ggplot(dat_esp, aes(x = Parameter, y = Frequency, fill = Model)) + 
  geom_boxplot(colour = "grey", outlier.shape = NA) + 
  coord_cartesian(ylim = c(0, 45)) + 
  theme_bw() + 
  theme(legend.position = "none") + 
  stat_summary(aes(y = Observed, group = 1), 
               fun.y = median, 
               geom = "line", 
               lwd = 1.5) + 
  theme(axis.title.x = element_blank(), 
        legend.position = c(0.86, 0.82)) + 
  ggtitle("Edge-wise shared partners") + 
  scale_fill_manual(values = c(col_saom, col_tergm))

box_2 <- ggplot(dat_dsp, aes(x = Parameter, y = Frequency, fill = Model)) + 
  geom_boxplot(colour = "grey", outlier.shape = NA) + 
  coord_cartesian(ylim = c(0, 325)) + 
  theme_bw() + 
  theme(legend.position = "none") + 
  stat_summary(aes(y = Observed, group = 1), 
               fun.y = median, 
               geom = "line", 
               lwd = 1.5) + 
  theme(axis.title.x = element_blank(), 
        axis.title.y = element_blank(), 
        legend.position = c(0.86, 0.82)) + 
  ggtitle("Dyad-wise shared partners") + 
  scale_fill_manual(values = c(col_saom, col_tergm))

box_3 <- ggplot(dat_ind, aes(x = Parameter, y = Frequency, fill = Model)) + 
  geom_boxplot(colour = "grey", outlier.shape = NA) + 
  coord_cartesian(ylim = c(0, 6.8)) + 
  theme_bw() + 
  theme(legend.position = "none") + 
  stat_summary(aes(y = Observed, group = 1), 
               fun.y = median, 
               geom = "line", 
               lwd = 1.5) + 
  theme(axis.title.x = element_blank(), 
        legend.position = c(0.86, 0.82)) + 
  ggtitle("Indegree distribution") + 
  scale_fill_manual(values = c(col_saom, col_tergm))

box_4 <- ggplot(dat_geo, aes(x = Parameter, y = Frequency, fill = Model)) + 
  geom_boxplot(colour = "grey", outlier.shape = NA) + 
  coord_cartesian(ylim = c(0, 260)) + 
  theme_bw() + 
  stat_summary(aes(y = Observed, group = 1), 
               fun.y = median, 
               geom = "line", 
               lwd = 1.5) + 
  theme(axis.title.x = element_blank(), 
        axis.title.y = element_blank(), 
        legend.position = c(0.75, 0.82)) + 
  ggtitle("Geodesic distances") + 
  scale_fill_manual(values = c(col_saom, col_tergm))

pdf("knecht-boxplot.pdf", width = 8, height = 6)
grid.arrange(box_1, box_2, box_3, box_4, ncol = 2)
dev.off()


# ==============================================================================
# MCMC trace and density
# ==============================================================================

mcsamp <- as.matrix(tergm.0.firstthree@ergm$sample)
mcsamp <- mcsamp[, -ncol(mcsamp)]
colnames(mcsamp) <- c("Edges", 
                      "Reciprocity", 
                      "Transitive triplets", 
                      "Transitive ties", 
                      "Cyclic triplets", 
                      "Indegree of alter (sqrt)", 
                      "Outdegree of alter (sqrt)", 
                      "Outdegree of ego (sqrt)", 
                      "Male (ego)", 
                      "Male (alter)", 
                      "Male (match)", 
                      "Same primary school", 
                      "Dyadic stability"
                      )
mcsamp <- melt(mcsamp)
colnames(mcsamp) <- c("Index", "Statistic", "Value")

pdf("knecht-mcmctrace.pdf", width = 10, height = 13)
ggplot(mcsamp, aes(y = Value, x = Index)) + 
    theme_bw() + 
    geom_line() + 
    facet_wrap(~ Statistic, scales = "free", ncol = 3) + 
    theme(strip.background = element_blank(), 
          axis.title.x = element_blank(), 
          axis.title.y = element_blank())
dev.off()

pdf("knecht-mcmcdensity.pdf", width = 10, height = 13)
ggplot(mcsamp, aes(x = Value)) + 
    theme_bw() + 
    geom_density() + 
    facet_wrap(~ Statistic, scales = "free", ncol = 3) + 
    theme(strip.background = element_blank(), 
          axis.title.x = element_blank(), 
          axis.title.y = element_blank())
dev.off()